Load libraries and data

library(Seurat)
library(tidyverse)
library(SingleR)
library(celldex)
library(knitr)
library(Azimuth)
library(cowplot)
library(grDevices)

set.seed(4)

# Top-level folder where script outputs should be stored
script_output_dir <- file.path(here::here(), "output")

day3_clstr <- readRDS(file = file.path(script_output_dir, "processed_data/5_singlets_with_adt_d3.rds"))
day15_clstr <- readRDS(file = file.path(script_output_dir, "processed_data/5_singlets_with_adt_d15.rds"))

Starting number of cells

length(day3_clstr$orig.ident)
## [1] 12065
length(day15_clstr$orig.ident)
## [1] 4691

Annotate with SingleR

Use SingleR for high-level clusters.

# Load reference data
# Note, must downgrade dbplyr to version 2.3.4 to avoid 'collect()' error
ref_data <- HumanPrimaryCellAtlasData(ensembl = FALSE, cell.ont = "all")

Day 3

# Note that better to use raw counts than SCTransform data (see SingleR issue #98)
d3_norm_count <- GetAssayData(day3_clstr, layer = "counts", assay = "RNA")

# Annotate
d3_predictions <- SingleR(test = d3_norm_count, 
                          ref = ref_data,
                          labels = ref_data$label.main)
day3_clstr[["SingleR_labels"]] <- d3_predictions$labels
day3_clstr[["SingleR_prunedlabels"]] <- d3_predictions$pruned.labels

# QC: Confidence scores
d3_clusts <- day3_clstr$seurat_clusters

plotScoreHeatmap(d3_predictions, 
                 clusters = d3_clusts,
                 order_by = "clusters")

# QC: Number of cells it couldn't annotate
summary(is.na(day3_clstr$SingleR_prunedlabels))
##    Mode   FALSE    TRUE 
## logical   12061       4

Visualize

Idents(day3_clstr) <- "SingleR_prunedlabels"
DimPlot(day3_clstr, group.by = "SingleR_prunedlabels") + ggtitle("Day 3 SingleR (Human Cell Atlas)")

# Highlight cells it couldn't annotate
d3_unannot <- day3_clstr@meta.data[is.na(day3_clstr@meta.data$SingleR_prunedlabels),]
d3_num_unannot <- as.character(nrow(d3_unannot))
DimPlot(day3_clstr, cells.highlight = rownames(d3_unannot)) +
  NoLegend() +
  ggtitle(paste0("Day 3: Cells without SingleR pruned label (n=", d3_num_unannot, ")"))

Day 15

# Note that better to use raw counts than SCTransform data (see SingleR issue #98)
d15_norm_count <- GetAssayData(day15_clstr, layer = "counts", assay = "RNA")

# Annotate
d15_predictions <- SingleR(test = d15_norm_count, ref = ref_data,
                           labels = ref_data$label.main)
day15_clstr[["SingleR_labels"]] <- d15_predictions$labels
day15_clstr[["SingleR_prunedlabels"]] <- d15_predictions$pruned.labels

# QC: Confidence scores
d15_clusts <- day15_clstr$seurat_clusters

plotScoreHeatmap(d15_predictions, 
                 clusters = d15_clusts,
                 order_by = "clusters")

# QC: Number of cells it couldn't annotate
summary(is.na(d15_predictions$pruned.labels))
##    Mode   FALSE    TRUE 
## logical    4635      56

Visualize

Idents(day15_clstr) <- "SingleR_prunedlabels"
DimPlot(day15_clstr) + ggtitle("Day 15 SingleR (Human Cell Atlas)")

# Highlight cells it couldn't annotate
d15_unannot <- day15_clstr@meta.data[is.na(day15_clstr@meta.data$SingleR_prunedlabels),]
d15_num_unannot <- as.character(nrow(d15_unannot))
DimPlot(day15_clstr, cells.highlight = rownames(d15_unannot)) +
  NoLegend() +
  ggtitle(paste0("Day 15: Cells without SingleR pruned label (n=", d15_num_unannot, ")"))

54 seems like a lot, I see these are mostly T cells and NK cells. Will see what Azimuth comes up with for annotation.

# SingleR labels of these cells that don't have pruned labels
table(d15_unannot$SingleR_labels)
## 
##         DC Macrophage    NK_cell    T_cells 
##          1          2         20         33

Where are these NK cells located in the UMAP?

unannot_nk <- d15_unannot %>%
  filter(SingleR_labels == "NK_cell") %>%
  rownames()

DimPlot(day15_clstr, cells.highlight = unannot_nk) +
  NoLegend() +
  ggtitle(paste0("Day 15: Cells with NK_cell label and no pruned label (n=", 
                 length(unannot_nk), ")"))

Consolidate annotations

lymphoid = c("T_cells", "NK_cell", "B_cell", "Pre-B_cell_CD34-", "Pro-B_cell_CD34+",
             "HSC_CD34+")
myeloid = c("Monocyte", "Macrophage", "DC", "Neutrophils",
            "GMP",  # Granulocyte-Macrophage Progenitor?
            "CMP")  # Common Myeloid Progenitor?
muscle = c("Fibroblasts", "Smooth_muscle_cells", "Tissue_stem_cells", "iPS_cells")
connective = c("Chondrocytes", "Osteoblasts", "MSC")
nervous = c("Neurons", "Astrocyte")
epithelial = c("Epithelial_cells")
endothelial = c("Endothelial_cells")

day3_clstr@meta.data <- day3_clstr@meta.data %>%
  mutate(eb_atlas_idents = case_when(
    SingleR_prunedlabels %in% lymphoid ~ "Lymphoid",
    SingleR_prunedlabels %in% myeloid ~ "Myeloid",
    SingleR_prunedlabels %in% muscle ~ "Muscle tissue",
    SingleR_prunedlabels %in% connective ~ "Connective tissue",
    SingleR_prunedlabels %in% nervous ~ "Nervous tissue",
    SingleR_prunedlabels %in% epithelial ~ "Epithelial",
    SingleR_prunedlabels %in% endothelial ~ "Endothelial",
    is.na(SingleR_prunedlabels) ~ "Unknown",
    .default = SingleR_prunedlabels
  ))

day15_clstr@meta.data <- day15_clstr@meta.data %>%
  mutate(eb_atlas_idents = case_when(
    SingleR_prunedlabels %in% lymphoid ~ "Lymphoid",
    SingleR_prunedlabels %in% myeloid ~ "Myeloid",
    SingleR_prunedlabels %in% muscle ~ "Muscle tissue",
    SingleR_prunedlabels %in% connective ~ "Connective tissue",
    SingleR_prunedlabels %in% nervous ~ "Nervous tissue",
    SingleR_prunedlabels %in% epithelial ~ "Epithelial",
    SingleR_prunedlabels %in% endothelial ~ "Endothelial",
    is.na(SingleR_prunedlabels) ~ "Unknown",
    .default = SingleR_prunedlabels
  ))

Idents(day3_clstr) <- "eb_atlas_idents"
Idents(day15_clstr) <- "eb_atlas_idents"

Save

day3_clstr_filt <- day3_clstr
day15_clstr_filt <- day15_clstr

saveRDS(day3_clstr_filt, file = file.path(script_output_dir, "processed_data/6_singler_d3.rds"))
saveRDS(day15_clstr_filt, file = file.path(script_output_dir, "processed_data/6_singler_d15.rds"))
day3_clstr_filt <- readRDS(file = file.path(script_output_dir, "processed_data/6_singler_d3.rds"))
day15_clstr_filt <- readRDS(file = file.path(script_output_dir, "processed_data/6_singler_d15.rds"))

Idents(day3_clstr_filt) <- "eb_atlas_idents"
Idents(day15_clstr_filt) <- "eb_atlas_idents"

Visualize

High-level SingleR Human Cell Atlas Clusters

day3_clstr_filt@meta.data <- day3_clstr_filt@meta.data %>%
  mutate(eb_atlas_idents = factor(eb_atlas_idents,
                                     levels = c("Keratinocytes",
                                                "Lymphoid",
                                                "Myeloid",
                                                "Epithelial",
                                                "Endothelial",
                                                "Muscle tissue",
                                                "Connective tissue",
                                                "Nervous tissue",
                                                "Unknown")))

day15_clstr_filt@meta.data <- day15_clstr_filt@meta.data %>%
  mutate(eb_atlas_idents = factor(eb_atlas_idents,
                                     levels = c("Keratinocytes",
                                                "Lymphoid",
                                                "Myeloid",
                                                "Epithelial",
                                                "Endothelial",
                                                "Muscle tissue",
                                                "Connective tissue",
                                                "Nervous tissue",
                                                "Unknown")))

Idents(day3_clstr_filt) <- "eb_atlas_idents"
Idents(day15_clstr_filt) <- "eb_atlas_idents"

clstr_colors <- c("Lymphoid" = "#F8766D", "Myeloid" = "#E68613",
                 "Epithelial" = "#0CB702", "Connective tissue" = "#00BE67", 
                 "Nervous tissue" = "#A52A2A", "Endothelial" = "#00A9FF",
                 "Muscle tissue" = "#C77CFF", "Keratinocytes" = "#FF61CC", 
                 "Unknown" = "gray")

d1_with_leg <- DimPlot(day3_clstr_filt, pt.size = 0.4) + 
  ggtitle("DAY 3") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "bottom",
        legend.spacing.x = unit(0.4, 'cm'),
        legend.text = element_text(size=12, margin = margin(t = 1))) +
  scale_color_manual(values = clstr_colors) +
  guides(color = guide_legend(ncol = 5,
                              override.aes = list(size=4))) +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")
d1_with_leg

d1 <- DimPlot(day3_clstr_filt, pt.size = 0.4) + 
  ggtitle("DAY 3") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_color_manual(values = clstr_colors) +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")
d1

d2 <- DimPlot(day15_clstr_filt, pt.size = 0.4) + 
  ggtitle("DAY 15") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_color_manual(values = clstr_colors) +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")
d2

highlevel_legend <- cowplot::get_legend(d1_with_leg)

Number of cells

Day 3

length(day3_clstr_filt$orig.ident)  # Day 3 total
## [1] 12065
table(day3_clstr_filt$eb_atlas_idents) %>%
  kable(caption = "Day 3 high-level")
Day 3 high-level
Var1 Freq
Keratinocytes 2193
Lymphoid 4150
Myeloid 2507
Epithelial 1335
Endothelial 489
Muscle tissue 1092
Connective tissue 250
Nervous tissue 45
Unknown 4
table(day3_clstr_filt$eb_atlas_idents, day3_clstr_filt$PTID)
##                    
##                       1   5   7   8   9  10  11  12  13  16 Doublet Negative
##   Keratinocytes      45 127 421 205 205 407   0  84 196 503       0        0
##   Lymphoid          274 459 218 420 553 258   0 592 688 688       0        0
##   Myeloid           213 321 142 510 279 107   0 388 285 262       0        0
##   Epithelial         32  95 209  85 121 312   0 106 146 229       0        0
##   Endothelial        27  24  20  99  55  50   0  61  68  85       0        0
##   Muscle tissue      97 112 112 130  88  83   0  98 112 260       0        0
##   Connective tissue  19  42  26  39  16   5   0  36  23  44       0        0
##   Nervous tissue      5   4   2   1   5   6   0   3   9  10       0        0
##   Unknown             0   0   0   2   0   1   0   0   0   1       0        0

Day 15

length(day15_clstr_filt$orig.ident)  # Day 15 total
## [1] 4691
table(day15_clstr_filt$eb_atlas_idents) %>%
  kable(caption = "Day 15 high-level")
Day 15 high-level
Var1 Freq
Keratinocytes 1345
Lymphoid 1610
Myeloid 480
Epithelial 495
Endothelial 193
Muscle tissue 422
Connective tissue 63
Nervous tissue 27
Unknown 56
table(day15_clstr_filt$eb_atlas_idents, day15_clstr_filt$PTID)
##                    
##                       1   5   7   8   9  10  11  12  13  16 Doublet Negative
##   Keratinocytes     289   0   0   0 261  22 546   0 116 111       0        0
##   Lymphoid          331   0   0   0 498 103 197   0 343 138       0        0
##   Myeloid            89   0   0   0 168  38  42   0  58  85       0        0
##   Epithelial         79   0   0   0 102  26 215   0  36  37       0        0
##   Endothelial        53   0   0   0  16  12  83   0  18  11       0        0
##   Muscle tissue     140   0   0   0  67  42 103   0  53  17       0        0
##   Connective tissue  24   0   0   0   5   4  20   0   9   1       0        0
##   Nervous tissue     12   0   0   0   2   1   8   0   3   1       0        0
##   Unknown            27   0   0   0   7   3   8   0   6   5       0        0

Markers

Look at top 5 positive markers by logFC for each cell type.

# Day 3
singler_markers_d3 <- FindAllMarkers(day3_clstr_filt, only.pos = TRUE, 
                                     min.pct = 0.25, 
                                     logfc.threshold = 0.25,
                                     # Use MAST with raw counts
                                     assay = "RNA",
                                     slot = "counts",
                                     test.use = "MAST")
singler_markers_d3 %>%
  group_by(cluster) %>%
  slice_max(n = 5, order_by = avg_log2FC) %>%
  kable(caption = "Day 3: Top 5 Markers")
Day 3: Top 5 Markers
p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
0.0000000 6.565390 0.428 0.110 0.0000000 Keratinocytes CNFN
0.0000000 6.099586 0.806 0.268 0.0000000 Keratinocytes KRTDAP
0.0000000 5.980603 0.413 0.054 0.0000000 Keratinocytes KRT1
0.0000000 5.937167 0.417 0.076 0.0000000 Keratinocytes CALML5
0.0000000 5.523713 0.495 0.153 0.0000000 Keratinocytes S100A7
0.0000000 5.754953 0.400 0.009 0.0000000 Lymphoid LCK
0.0000000 5.743634 0.310 0.007 0.0000000 Lymphoid CD3D
0.0000000 5.463430 0.302 0.008 0.0000000 Lymphoid CD3E
0.0000000 5.115741 0.276 0.009 0.0000000 Lymphoid SPOCK2
0.0000000 4.651658 0.259 0.015 0.0000000 Lymphoid CD7
0.0000000 7.411502 0.293 0.002 0.0000000 Myeloid LILRB4
0.0000000 7.161655 0.788 0.069 0.0000000 Myeloid LYZ
0.0000000 6.865403 0.450 0.013 0.0000000 Myeloid IL1B
0.0000000 6.553915 0.302 0.005 0.0000000 Myeloid MS4A7
0.0000000 6.538311 0.387 0.007 0.0000000 Myeloid SERPINA1
0.0000000 4.434207 0.285 0.024 0.0000000 Epithelial KRT15
0.0000000 3.738123 0.345 0.054 0.0000000 Epithelial LAMB3
0.0000000 3.689183 0.418 0.040 0.0000000 Epithelial COL17A1
0.0000000 3.588351 0.816 0.324 0.0000000 Epithelial S100A2
0.0000000 3.179110 0.258 0.042 0.0000000 Epithelial LAMC2
0.0000000 8.700318 0.317 0.001 0.0000000 Endothelial NR5A2
0.0000000 8.692648 0.417 0.002 0.0000000 Endothelial TIE1
0.0000000 8.577542 0.301 0.003 0.0000000 Endothelial SELE
0.0000000 8.541233 0.301 0.001 0.0000000 Endothelial SOX17
0.0000000 8.472049 0.703 0.007 0.0000000 Endothelial PLVAP
0.0000000 6.157499 0.291 0.007 0.0000000 Muscle tissue TMEM119
0.0000000 5.894402 0.640 0.021 0.0000000 Muscle tissue PRRX1
0.0000000 5.677099 0.501 0.017 0.0000000 Muscle tissue MEDAG
0.0000000 5.594135 0.543 0.021 0.0000000 Muscle tissue COL3A1
0.0000000 5.578659 0.417 0.013 0.0000000 Muscle tissue PRKG1
0.0000000 4.705894 0.544 0.059 0.0000000 Connective tissue MT1M
0.0000000 4.422745 0.416 0.030 0.0000000 Connective tissue PLAC9
0.0000000 4.358383 0.544 0.037 0.0000000 Connective tissue CXCL12
0.0000000 4.275321 0.340 0.024 0.0000000 Connective tissue PLA2G2A
0.0000000 4.008408 0.260 0.023 0.0000000 Connective tissue MT1A
0.0000000 8.811318 0.422 0.001 0.0000000 Nervous tissue CDH19
0.0000000 8.016902 0.422 0.002 0.0000000 Nervous tissue PLP1
0.0000000 6.071943 0.333 0.008 0.0000000 Nervous tissue MIA
0.0000000 5.990907 0.267 0.006 0.0000000 Nervous tissue FRMD5
0.0000000 5.515328 0.267 0.011 0.0000000 Nervous tissue NRXN1
0.0001235 10.558062 0.250 0.000 1.0000000 Unknown HPGDS
0.0077887 10.558062 0.250 0.000 1.0000000 Unknown AL022341.2
0.0089853 10.236134 0.250 0.000 1.0000000 Unknown AC011944.1
0.0096183 9.973099 0.250 0.000 1.0000000 Unknown IL22RA2
0.0000048 9.857622 0.250 0.001 0.1231517 Unknown AC006230.1
# Day 15
singler_markers_d15 <- FindAllMarkers(day15_clstr_filt, only.pos = TRUE, 
                                     min.pct = 0.25, 
                                     logfc.threshold = 0.25,
                                     # Use MAST with raw counts
                                     assay = "RNA",
                                     slot = "counts",
                                     test.use = "MAST")
singler_markers_d15 %>%
  group_by(cluster) %>%
  slice_max(n = 5, order_by = avg_log2FC) %>%
  kable(caption = "Day 15: Top 5 Markers")
Day 15: Top 5 Markers
p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
0.0000000 6.9723688 0.425 0.074 0.00e+00 Keratinocytes CALML5
0.0000000 6.8759014 0.709 0.182 0.00e+00 Keratinocytes KRTDAP
0.0000000 6.6643508 0.281 0.036 0.00e+00 Keratinocytes KRT2
0.0000000 6.5777890 0.797 0.268 0.00e+00 Keratinocytes KRT1
0.0000000 6.5383942 0.379 0.015 0.00e+00 Keratinocytes DSC1
0.0000000 8.9447666 0.301 0.001 0.00e+00 Lymphoid CTLA4
0.0000000 8.2351296 0.332 0.003 0.00e+00 Lymphoid GZMA
0.0000000 8.0777485 0.470 0.003 0.00e+00 Lymphoid TRAT1
0.0000000 8.0570675 0.669 0.006 0.00e+00 Lymphoid CD2
0.0000000 8.0378760 0.324 0.002 0.00e+00 Lymphoid SH2D1A
0.0000000 9.2205194 0.304 0.001 0.00e+00 Myeloid TLR8
0.0000000 8.9358580 0.419 0.004 0.00e+00 Myeloid FPR3
0.0000000 8.8306034 0.585 0.008 0.00e+00 Myeloid LILRB4
0.0000000 8.6985667 0.352 0.001 0.00e+00 Myeloid FCAR
0.0000000 8.6876454 0.485 0.005 0.00e+00 Myeloid CD86
0.0000000 3.6611581 0.521 0.103 0.00e+00 Epithelial LAMB3
0.0000000 3.3453214 0.366 0.051 0.00e+00 Epithelial NRG1
0.0000000 3.3074881 0.475 0.115 0.00e+00 Epithelial LAMC2
0.0000000 3.0008771 0.335 0.060 0.00e+00 Epithelial P3H2
0.0000000 2.6492344 0.539 0.120 0.00e+00 Epithelial ITGA2
0.0000000 10.4252540 0.648 0.004 0.00e+00 Endothelial PLVAP
0.0000000 10.3305135 0.492 0.001 0.00e+00 Endothelial MYCT1
0.0000000 10.1175197 0.534 0.002 0.00e+00 Endothelial CLDN5
0.0000000 10.0591864 0.306 0.002 0.00e+00 Endothelial SELE
0.0000000 9.8988089 0.554 0.005 0.00e+00 Endothelial CCL14
0.0000000 7.4179411 0.289 0.008 0.00e+00 Muscle tissue LINC01060
0.0000000 7.0628205 0.403 0.009 0.00e+00 Muscle tissue PDE3A
0.0000000 7.0094285 0.412 0.107 0.00e+00 Muscle tissue MMP1
0.0000000 6.8328116 0.791 0.034 0.00e+00 Muscle tissue PRKG1
0.0000000 6.6206186 0.273 0.004 0.00e+00 Muscle tissue ENPEP
0.0000000 5.5049964 0.254 0.008 0.00e+00 Connective tissue FIBIN
0.0000000 5.2443362 0.270 0.026 0.00e+00 Connective tissue CH25H
0.0000000 5.1724210 0.302 0.013 0.00e+00 Connective tissue OMD
0.0000000 5.1317790 0.254 0.008 0.00e+00 Connective tissue MMP27
0.0000000 4.9358588 0.270 0.011 0.00e+00 Connective tissue FREM1
0.0000000 10.8780240 0.593 0.012 0.00e+00 Nervous tissue TYRP1
0.0000000 10.0840981 0.593 0.007 0.00e+00 Nervous tissue DCT
0.0000000 10.0059314 0.704 0.001 0.00e+00 Nervous tissue CDH19
0.0000000 9.8207352 0.593 0.002 0.00e+00 Nervous tissue TYR
0.0000000 9.6972923 0.481 0.010 0.00e+00 Nervous tissue MLANA
0.0000000 3.2906253 0.286 0.009 0.00e+00 Unknown TPSAB1
0.0000000 3.0944311 0.250 0.026 5.18e-05 Unknown KIT
0.0001721 0.5231372 0.161 0.255 1.00e+00 Unknown CSKMT

CD45 (aka PTPRC) expression

DefaultAssay(day3_clstr_filt) <- "SCT"
DefaultAssay(day15_clstr_filt) <- "SCT"

# CD45 expression
cd45_d3 <- FeaturePlot(day3_clstr_filt, features = "PTPRC", label = TRUE, repel = TRUE) +
  NoLegend() +
  ggtitle("Day 3: CD45 ('PTPRC')")
cd45_d3

cd45_d15 <- FeaturePlot(day15_clstr_filt, features = "PTPRC", label = TRUE, repel = TRUE) +
  NoLegend() +
  ggtitle("Day 15: CD45 ('PTPRC')")
cd45_d15

Other specific markers

  • Lymphoid: IL7R
  • Myeloid: LYZ
  • Epithelial: S100A2
  • Connective tissue: DCN
  • Nervous tissue: DCT
  • Endothelial: FLT1
  • Muscle tissue: COL3A1
  • Keratinocyte: DMKN (epidermis-specific secreted protein)

Day 3

make_marker_plot <- function(obj, marker) {
  out <- FeaturePlot(obj, features = marker, pt.size = 0.4) +
  theme(legend.position = "none",
        axis.line=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        plot.title = element_text(hjust = 0.5, 
                                  size = 20,
                                  face = "italic"))
  return(out)
}

d3_1 <- make_marker_plot(day3_clstr_filt, "IL7R")
d3_2 <- make_marker_plot(day3_clstr_filt, "LYZ")
d3_3 <- make_marker_plot(day3_clstr_filt, "S100A2")
d3_4 <- make_marker_plot(day3_clstr_filt, "DCN")
d3_5 <- make_marker_plot(day3_clstr_filt, "DCT")
d3_6 <- make_marker_plot(day3_clstr_filt, "FLT1")
d3_7 <- make_marker_plot(day3_clstr_filt, "COL3A1")
d3_8 <- make_marker_plot(day3_clstr_filt, "DMKN")

d3_grid <- plot_grid(d3_1, d3_2, d3_3, d3_4, d3_5, d3_6, d3_7, d3_8, nrow = 2)
d3_grid

Day 15

d15_1 <- make_marker_plot(day15_clstr_filt, "IL7R")
d15_2 <- make_marker_plot(day15_clstr_filt, "LYZ")
d15_3 <- make_marker_plot(day15_clstr_filt, "S100A2")
d15_4 <- make_marker_plot(day15_clstr_filt, "DCN")
d15_5 <- make_marker_plot(day15_clstr_filt, "DCT")
d15_6 <- make_marker_plot(day15_clstr_filt, "FLT1")
d15_7 <- make_marker_plot(day15_clstr_filt, "COL3A1")
d15_8 <- make_marker_plot(day15_clstr_filt, "DMKN")

d15_grid <- plot_grid(d15_1, d15_2, d15_3, d15_4, d15_5, d15_6, d15_7, d15_8, nrow = 2)
d15_grid

Sub-cluster the immune cells

First subset and remove current normalized counts

day3_sub <- subset(day3_clstr_filt, idents = c("Lymphoid", "Myeloid"))
DefaultAssay(day3_sub) <- "RNA"
day3_sub[["SCT"]] <- NULL

day15_sub <- subset(day15_clstr_filt, idents = c("Lymphoid", "Myeloid"))
DefaultAssay(day15_sub) <- "RNA"
day15_sub[["SCT"]] <- NULL

Cluster again

Same norm_and_reduce from GEX QC script.

norm_and_reduce <- function(dm_singlets) {
  dm_singlets <- SCTransform(dm_singlets, 
                             vars.to.regress = "percent.mt",
                             verbose = FALSE)
  dm_singlets <- RunPCA(dm_singlets, verbose = FALSE)  
  dm_singlets <- FindNeighbors(dm_singlets, dims = 1:30)
  dm_singlets <- RunUMAP(dm_singlets, dims = 1:30)
  return(dm_singlets)
}

Day 3

Resolution = 0.4 is what Jolie used for the HAARVI paper and seems reasonable here.

day3_sub <- norm_and_reduce(day3_sub)
## Computing nearest neighbor graph
## Computing SNN
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:13:33 UMAP embedding parameters a = 0.9922 b = 1.112
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## 16:13:33 Read 6657 rows and found 30 numeric columns
## 16:13:33 Using Annoy for neighbor search, n_neighbors = 30
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## 16:13:33 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:13:34 Writing NN index file to temp file /tmp/RtmpFeilP1/file71b2142d7a4a
## 16:13:34 Searching Annoy index using 1 thread, search_k = 3000
## 16:13:36 Annoy recall = 100%
## 16:13:37 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:13:39 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:13:39 Commencing optimization for 500 epochs, with 280390 positive edges
## 16:13:46 Optimization finished
day3_sub_clstr <- FindClusters(day3_sub, verbose = FALSE, resolution = 0.4)

Visualize

p7 <- DimPlot(day3_sub_clstr, label = TRUE) + 
  NoLegend() +
  ggtitle("Day 3 Seurat Sub-Clusters")
p7

Day 15

day15_sub <- norm_and_reduce(day15_sub)
## Computing nearest neighbor graph
## Computing SNN
## 16:14:02 UMAP embedding parameters a = 0.9922 b = 1.112
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## 16:14:02 Read 2090 rows and found 30 numeric columns
## 16:14:02 Using Annoy for neighbor search, n_neighbors = 30
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## 16:14:02 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:14:02 Writing NN index file to temp file /tmp/RtmpFeilP1/file71b2435d16ac
## 16:14:02 Searching Annoy index using 1 thread, search_k = 3000
## 16:14:03 Annoy recall = 100%
## 16:14:04 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:14:06 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:14:06 Commencing optimization for 500 epochs, with 81080 positive edges
## 16:14:09 Optimization finished
day15_sub_clstr <- FindClusters(day15_sub, verbose = FALSE, resolution = 0.4)

Visualize

p8 <- DimPlot(day15_sub_clstr, label = TRUE) + 
  NoLegend() +
  ggtitle("Day 15 Seurat Sub-Clusters")
p8

p7 + p8

Annotate with Azimuth

Use Azimuth (part of Seurat and recommended by recent review paper)

The web app FAQs have helpful information for interpreting results: https://azimuth.hubmapconsortium.org/#How%20can%20I%20tell%20if%20my%20mapping%20results%20are%20accurate%3f

day3_sub_annot <- RunAzimuth(day3_sub_clstr, reference = "pbmcref")
DimPlot(day3_sub_annot, group.by = "predicted.celltype.l2", label = TRUE, repel = TRUE) + 
  ggtitle("Day 3 Azimuth Annotations") + 
  NoLegend()

day15_sub_annot <- RunAzimuth(day15_sub_clstr, reference = "pbmcref")
DimPlot(day15_sub_annot, group.by = "predicted.celltype.l2", label = TRUE, repel = TRUE) + 
  ggtitle("Day 15 Azimuth Annotations") + 
  NoLegend()

Consolidate Clusters

To compare with Jolie’s data and make things tidier.

consol_azi <- function(in_obj) {
  Idents(in_obj) <- "predicted.celltype.l2"
  in_obj <- RenameIdents(object = in_obj,
                         "B intermediate" = "B cells",
                         "B memory" = "B cells",
                         "NK Proliferating" = "NK",
                         "NK_CD56bright" = "NK")
  in_obj[["eb_idents"]] <- Idents(object = in_obj)
  return(in_obj)
}

day3_sub_annot <- consol_azi(day3_sub_annot)
day15_sub_annot <- consol_azi(day15_sub_annot)

saveRDS(day3_sub_annot, file = file.path(script_output_dir, "processed_data/7_sub_annot_d3.rds"))
saveRDS(day15_sub_annot, file = file.path(script_output_dir, "processed_data/7_sub_annot_d15.rds"))
day3_sub_annot <- readRDS(file = file.path(script_output_dir, "processed_data/7_sub_annot_d3.rds"))
day15_sub_annot <- readRDS(file = file.path(script_output_dir, "processed_data/7_sub_annot_d15.rds"))

Get rid of the one Day 3 cell where eb_idents is NA (causes errors down the line).

na_cell <- day3_sub_annot@meta.data %>%
  filter(is.na(eb_idents)) %>%
  rownames()
length(na_cell)
## [1] 0
day3_sub_annot <- subset(x = day3_sub_annot, idents = na_cell, invert = TRUE)

How many cells of each type?

Day 3

length(day3_sub_annot$orig.ident)  # Day 3 immune total
## [1] 6657
d3_immune_tally <- day3_sub_annot@meta.data %>%
  group_by(eb_idents) %>%
  tally()

d3_immune_tally %>%
  kable(caption = "Day 3 immune-level")
Day 3 immune-level
eb_idents n
B cells 62
NK 196
CD8 TEM 501
ILC 146
CD14 Mono 1981
CD4 TCM 2574
CD4 TEM 136
cDC2 180
CD8 TCM 112
dnT 32
MAIT 71
Platelet 145
Treg 85
CD16 Mono 139
CD8 Naive 63
gdT 84
pDC 58
CD8 Proliferating 6
CD4 Proliferating 48
cDC1 17
HSPC 18
ASDC 1
CD4 CTL 1
Eryth 1
table(day3_sub_annot$eb_idents, day3_sub_annot$PTID)
##                    
##                       1   5   7   8   9  10  11  12  13  16 Doublet Negative
##   B cells             6  11   1  14   9   2   0   6   1  12       0        0
##   NK                 28  41  12  19  17  11   0  10  33  25       0        0
##   CD8 TEM            41  60  47  30  98  34   0  48  77  66       0        0
##   ILC                24  31   7   3  20  11   0  16  17  17       0        0
##   CD14 Mono         177 227 101 397 224  69   0 338 248 200       0        0
##   CD4 TCM           139 243 114 299 279 166   0 456 423 455       0        0
##   CD4 TEM             5  22  11  10  33   9   0  11  15  20       0        0
##   cDC2               12  25  25  39  19  12   0  19   8  21       0        0
##   CD8 TCM             9   7   7  11  21   6   0   9  27  15       0        0
##   dnT                 2   1   2   1   3   5   0   3   4  11       0        0
##   MAIT                0   9   6   1  26   5   0   3  13   8       0        0
##   Platelet           11  22   8  36  13   8   0  17  14  16       0        0
##   Treg                4  15   4  10   8   4   0  11  16  13       0        0
##   CD16 Mono           7  34   4  24  18  14   0   9  12  17       0        0
##   CD8 Naive           2   0   1   3  12   0   0   3  24  18       0        0
##   gdT                 3   4   2  11   9   6   0   7  23  19       0        0
##   pDC                 3  14   3   4  14   1   0   2   9   8       0        0
##   CD8 Proliferating   0   0   0   0   4   0   0   1   1   0       0        0
##   CD4 Proliferating  12   3   1   9   5   1   0   7   6   4       0        0
##   cDC1                0   4   1   8   0   0   0   2   0   2       0        0
##   HSPC                2   7   3   1   0   1   0   1   2   1       0        0
##   ASDC                0   0   0   0   0   0   0   0   0   1       0        0
##   CD4 CTL             0   0   0   0   0   0   0   1   0   0       0        0
##   Eryth               0   0   0   0   0   0   0   0   0   1       0        0

Day 15

length(day15_sub_annot$orig.ident)  # Day 15 immune total
## [1] 2090
d15_immune_tally <- day15_sub_annot@meta.data %>%
  group_by(eb_idents) %>%
  tally()

d15_immune_tally %>%
  kable(caption = "Day 15 immune-level")
Day 15 immune-level
eb_idents n
B cells 22
NK 61
CD4 TCM 935
ILC 21
Platelet 79
MAIT 107
CD14 Mono 335
CD8 TEM 295
CD4 TEM 52
Treg 52
CD8 TCM 26
cDC2 41
pDC 6
cDC1 17
CD8 Naive 11
dnT 4
HSPC 14
CD4 Proliferating 10
CD16 Mono 1
gdT 1
table(day15_sub_annot$eb_idents, day15_sub_annot$PTID)
##                    
##                       1   5   7   8   9  10  11  12  13  16 Doublet Negative
##   B cells             2   0   0   0  10   0   2   0   6   2       0        0
##   NK                 10   0   0   0  15   5  17   0   8   6       0        0
##   CD4 TCM           204   0   0   0 261  63  82   0 237  88       0        0
##   ILC                 5   0   0   0   6   3   4   0   1   2       0        0
##   Platelet           26   0   0   0  13  19   9   0   4   8       0        0
##   MAIT                8   0   0   0  72   2  11   0  12   2       0        0
##   CD14 Mono          32   0   0   0 143  17  19   0  51  73       0        0
##   CD8 TEM            64   0   0   0  72  20  58   0  51  30       0        0
##   CD4 TEM             6   0   0   0  28   1   6   0  10   1       0        0
##   Treg               23   0   0   0  13   2   6   0   5   3       0        0
##   CD8 TCM             4   0   0   0   9   3   6   0   4   0       0        0
##   cDC2               23   0   0   0   5   1   9   0   1   2       0        0
##   pDC                 0   0   0   0   4   0   0   0   0   2       0        0
##   cDC1                9   0   0   0   3   1   3   0   1   0       0        0
##   CD8 Naive           0   0   0   0   6   0   2   0   3   0       0        0
##   dnT                 1   0   0   0   0   1   0   0   2   0       0        0
##   HSPC                2   0   0   0   2   2   3   0   2   3       0        0
##   CD4 Proliferating   1   0   0   0   2   1   2   0   3   1       0        0
##   CD16 Mono           0   0   0   0   1   0   0   0   0   0       0        0
##   gdT                 0   0   0   0   1   0   0   0   0   0       0        0

Remove types with < 5 cells

# Day 3
d3_imm_exclude <- d3_immune_tally %>%
  filter(n < 5) %>%
  pull(eb_idents)
print(d3_imm_exclude)
## [1] ASDC    CD4 CTL Eryth  
## 24 Levels: B cells NK CD8 TEM ILC CD14 Mono CD4 TCM CD4 TEM cDC2 ... Eryth
d3_subset <- subset(x = day3_sub_annot, 
                    idents = d3_imm_exclude, invert = TRUE)

# Day 15
d15_imm_exclude <- d15_immune_tally %>%
  filter(n < 5) %>%
  pull(eb_idents)
print(d15_imm_exclude)
## [1] dnT       CD16 Mono gdT      
## 20 Levels: B cells NK CD4 TCM ILC Platelet MAIT CD14 Mono CD8 TEM ... gdT
d15_subset <- subset(x = day15_sub_annot, 
                    idents = d15_imm_exclude, invert = TRUE)

Set factor levels and colors of immune subtypes

d3_subset@meta.data$eb_idents <- factor(d3_subset@meta.data$eb_idents,
                                levels = c("CD4 TCM", "CD4 TEM", "CD4 Proliferating",
                                           "CD8 TCM", "CD8 TEM", "CD8 Proliferating", "CD8 Naive", 
                                           "Treg", "dnT", "gdT", "MAIT", "CD14 Mono", "CD16 Mono",
                                           "cDC1", "cDC2", "pDC","ILC", "NK", "Platelet", "B cells",
                                           "HSPC"))

# No CD8 Proliferating, gdT, CD16 Mono, unlike D3
d15_subset@meta.data$eb_idents <- factor(d15_subset@meta.data$eb_idents,
                                levels = c("CD4 TCM", "CD4 TEM", "CD4 Proliferating",
                                           "CD8 TCM", "CD8 TEM", "CD8 Naive", 
                                           "Treg", "MAIT", "CD14 Mono",
                                           "cDC1", "cDC2", "pDC","ILC", "NK", "Platelet", "B cells",
                                           "HSPC"))
subtype_cols <- c(
  "B cells" = "darkorange", 
  "cDC1" = "mediumpurple", 
  "cDC2" = "mediumpurple",
  "pDC" = "mediumpurple",
  "ILC" = "darkgreen", 
  "CD14 Mono" = "#FF61CC", 
  "CD16 Mono" = "#FF61CC", 
  "NK" = "#0CB702", 
  "Platelet" = "gold", 
  "Tcm" = "dodgerblue", 
  "CD8 Naive" = "#00BFC4", 
  "CD8 Proliferating" = "#00BFC4", 
  "CD8 TEM" = "#00BFC4", 
  "CD4 Proliferating" = "darkblue", 
  "CD4 TEM" = "darkblue", 
  "CD4 TCM" = "darkblue",
  "Treg" = "cyan",
  "dnT" = "cyan", 
  "gdT" = "cyan", 
  "MAIT" = "cyan", 
  "HSPC" = "saddlebrown")

Visualize

d3_sub_plt <- DimPlot(d3_subset, group.by = "eb_idents", pt.size = 0.4,
        cols = subtype_cols) + 
  ggtitle("DAY 3") + 
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "bottom",
        legend.spacing.x = unit(0.4, 'cm'),
        legend.text = element_text(size=12, margin = margin(t = 1))) +
  guides(color = guide_legend(nrow = 3,
                              override.aes = list(size=4))) +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")
d3_sub_plt

d15_sub_plt <- DimPlot(d15_subset, group.by = "eb_idents", pt.size = 0.4,
        cols = subtype_cols) + 
  ggtitle("DAY 15") + 
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "bottom",
        legend.spacing.x = unit(0.4, 'cm'),
        legend.text = element_text(size=12, margin = margin(t = 1))) +
  guides(color = guide_legend(nrow = 3,
                              override.aes = list(size=4))) +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")
d15_sub_plt

# Get legend
subtype_legend <- cowplot::get_legend(d3_sub_plt)

# Plots without legend
d3_sub_noleg <- DimPlot(d3_subset, group.by = "eb_idents", pt.size = 0.4,
        cols = subtype_cols) + 
  ggtitle("DAY 3") + 
  theme(plot.title = element_text(hjust = 0.5, size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2") 

d15_sub_noleg <- DimPlot(d15_subset, group.by = "eb_idents", pt.size = 0.4,
        cols = subtype_cols) + 
  ggtitle("DAY 15") + 
  theme(plot.title = element_text(hjust = 0.5, size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")

Save objects

saveRDS(d3_subset, file = file.path(script_output_dir, "processed_data/8_annot_sub_final_d3.rds"))
saveRDS(d15_subset, file = file.path(script_output_dir, "processed_data/8_annot_sub_final_d15.rds"))

d3_subset <- readRDS(file = file.path(script_output_dir, "processed_data/8_annot_sub_final_d3.rds"))
d15_subset <- readRDS(file = file.path(script_output_dir, "processed_data/8_annot_sub_final_d15.rds"))

Number of immune cells

length(d3_subset$orig.ident)  # Day 3
## [1] 6654
length(d15_subset$orig.ident)  # Day 15
## [1] 2084

Seurat clusters

d1 <- DimPlot(d3_subset, pt.size = 0.4, group.by = "seurat_clusters", label = TRUE) + 
  NoLegend() +
  ggtitle("Day 3")

d2 <- DimPlot(d15_subset, pt.size = 0.4, group.by = "seurat_clusters", label = TRUE) + 
  NoLegend() +
  ggtitle("Day 15")

d1 + d2

By PTID

ptid_colors <- c("1" = "salmon", "5" = "orange", "7" = "#7570B3",
                 "8" = "#0CB702", "9" = "#A6761D", "10" = "#00BFC4",
                 "11" = "#C77CFF", "12" = "#FF61CC", "13" = "darkgreen",
                 "16" = "darkblue")

d3 <- DimPlot(d3_subset, pt.size = 0.4, group.by = "PTID", cols = ptid_colors) + 
  NoLegend() +
  ggtitle("Day 3")

d4 <- DimPlot(d15_subset, pt.size = 0.4, group.by = "PTID", cols = ptid_colors) + 
  NoLegend() +
  ggtitle("Day 15")

d3 + d4

By ARM

d5 <- DimPlot(d3_subset, pt.size = 0.4, group.by = "ARM") +
  NoLegend() +
  ggtitle("Day 3")

d6 <- DimPlot(d15_subset, pt.size = 0.4, group.by = "ARM") + 
  NoLegend() +
  ggtitle("Day 15")

d5 + d6

QC Azimuth

Look at confidence and mapping scores. Note that Azimuth doesn’t have neutrophils in its reference.

Confidence Score

  • 0 to 1 (high confidence is > 0.75)
  • Reflects the confidence associated with each annotation
  • Cells with high-confidence annotations reflect predictions that are supported by multiple consistent anchors
ft1 <- FeaturePlot(d3_subset, features = "predicted.celltype.l2.score", 
            label = TRUE, 
            repel = TRUE, 
            pt.size = 0.4) +
  ggtitle("Day 3 confidence scores") +
  NoLegend()

ft2 <- FeaturePlot(d15_subset, features = "predicted.celltype.l2.score", 
            label = TRUE, 
            repel = TRUE,
            pt.size = 0.4) +
  ggtitle("Day 15 confidence scores")

cowplot::plot_grid(ft1, ft2)

Mapping Score

  • 0 to 1
  • Reflects confidence that this cell is well represented by the reference
  • Reflects how well the unique structure of a cell’s local neighborhood is preserved during reference mapping
ft3 <- FeaturePlot(d3_subset, features = "mapping.score", 
            label = TRUE, 
            repel = TRUE,
            pt.size = 0.6) +
  ggtitle("Day 3 mapping scores") +
  NoLegend()

ft4 <- FeaturePlot(d15_subset, features = "mapping.score", 
            label = TRUE, 
            repel = TRUE,
            pt.size = 0.6) +
  ggtitle("Day 15 mapping scores")

cowplot::plot_grid(ft3, ft4)

Neutrophils

Neutrophils aren’t in the Azimuth references, and so they’d be annotated as CD14 monocytes (possibly with low mapping scores). See:

  • “What if my query dataset contains cell types that aren’t present in the reference?” in the docs
  • This GitHub issue

Here, I check for low mapping scores and neutrophil markers in CD14 monocytes (none express CEACAM5). There doesn’t seem to be a defined neutrophil population at either timepoint

d3_cd14mono <- subset(d3_subset, subset = eb_idents == "CD14 Mono")
d15_cd14mono <- subset(d15_subset, subset = eb_idents == "CD14 Mono")

Mapping scores

ft5 <- FeaturePlot(d3_cd14mono, features = "mapping.score", pt.size = 0.6,
                   label = TRUE) +
  ggtitle("Day 3 mapping scores") +
  NoLegend()

ft6 <- FeaturePlot(d15_cd14mono, features = "mapping.score", pt.size = 0.6,
                    label = TRUE) +
  ggtitle("Day 15 mapping scores")

cowplot::plot_grid(ft5, ft6)

Neutrophil marker expression

# No clear pattern, especially not connected to the mapping scores
ft7 <- FeaturePlot(d3_cd14mono, features = c("CEACAM1", "CEACAM6", "ITGAM", 
                                             "FCGR3A", "FUT4", "AZU1", 
                                             "ELANE", "MPO"), pt.size = 0.1)
ft8 <- FeaturePlot(d15_cd14mono, features = c("CEACAM1", "CEACAM6", "ITGAM", 
                                              "FCGR3A", "FUT4", "AZU1", 
                                              "ELANE", "MPO"), pt.size = 0.1)

cowplot::plot_grid(ft7, labels = "Day 3")

cowplot::plot_grid(ft8, labels = "Day 15")

Immune GEX markers

  • T cell: CD3D
  • Monocyte: CTSS
  • DC: HLA-DPA1
  • NK: NKG7
  • B cell: MS4A1

Day 3

d3_11 <- make_marker_plot(d3_subset, "CD3D")
d3_12 <- make_marker_plot(d3_subset, "CTSS")
d3_13 <- make_marker_plot(d3_subset, "HLA-DPA1")
d3_14 <- make_marker_plot(d3_subset, "NKG7")
d3_15 <- make_marker_plot(d3_subset, "MS4A1")

d3_grid_imm <- plot_grid(d3_11, d3_12, d3_13, d3_14, d3_15, nrow = 3)
d3_grid_imm + DimPlot(d3_subset, pt.size = 0.4, group.by = "eb_idents",
                      cols = subtype_cols)

Day 15

# TODO: Decide on final order to make plots (maybe look at mast cells earlier, include those in immmune subset and see where they cluster for further confirmation they should be their own thing)

d15_11 <- make_marker_plot(d15_subset, "CD3D")
d15_12 <- make_marker_plot(d15_subset, "CTSS")
d15_13 <- make_marker_plot(d15_subset, "HLA-DPA1")
# Some monocytes expressing this, but not the other markers for NK-like monocytes
d15_14 <- make_marker_plot(d15_subset, "NKG7")
d15_15 <- make_marker_plot(d15_subset, "MS4A1")

d15_grid_imm <- plot_grid(d15_11, d15_12, d15_13, d15_14, d15_15, nrow = 2)
d15_grid_imm + DimPlot(d15_subset, pt.size = 0.4, group.by = "eb_idents",
                       cols = subtype_cols)

Replace SingleR labels with Azimuth ones

Following instructions from issue #1748. I’ll replace the SingleR_prunedlabels with the more detailed Azimuth ones to create a ‘detailed’ meta.data column. I’ll replace my consolidated SingleR annotations with my consolidated Azimuth ones to create a ‘consolidated’ meta.data column.

replace_labels <- function(main_obj, sub_obj) {
  # 'detailed' labels
  Idents(main_obj) <- "SingleR_prunedlabels"
  Idents(sub_obj) <- "predicted.celltype.l2"
  main_obj$detailed <- as.character(Idents(main_obj))
  main_obj$detailed[Cells(sub_obj)] <- as.character(Idents(sub_obj))
  # 'consolidated' labels
  Idents(main_obj) <- "eb_atlas_idents"
  Idents(sub_obj) <- "eb_idents"
  main_obj$consolidated <- as.character(Idents(main_obj))
  main_obj$consolidated[Cells(sub_obj)] <- as.character(Idents(sub_obj))
  return(main_obj)
}

day3_clstr_filt <- replace_labels(day3_clstr_filt, d3_subset)
day15_clstr_filt <- replace_labels(day15_clstr_filt, d15_subset)

Day 15 Unknowns

One group of cells seem to be mast cells based on:

  • Tight clustering with eachother and away from other clusters (even though distances in UMAPs are not meaningful)
  • ‘Unknown’ annotation (Azimuth and SingleR do not have a mast cell category)
  • Relatively high expression of mast cell marker TPSAB1 in all of these cells

These are part of a Seurat cluster that’s spread around the UMAP, so I’ll re-annotate based on TPSAB1 expression.

mastplt <- FeaturePlot(day15_clstr_filt, features = "TPSAB1")
mastplt

# Get and look at the cells expressing high levels of TPSAB1
mast_cells <- WhichCells(day15_clstr_filt, expression = TPSAB1 > 1.1)
DimPlot(day15_clstr_filt, cells.highlight = list(mast_cells))

# Re-annotate
day15_clstr_filt@meta.data <- day15_clstr_filt@meta.data %>%
  mutate(barcode = rownames(.)) %>%
  mutate(consolidated = case_when(
    barcode %in% mast_cells ~ "Mast cells",
    .default = consolidated
  ))

Note that there doesn’t seem to be a mast cell cluster at Day 3.

FeaturePlot(day3_clstr_filt, features = "TPSAB1")

Update atlas-level labels for Myeloid and Lymphoid

Use Azimuth annotations to decide if a cell is Myeloid or Lymphoid

# Leave these cell types as-is: HSPC, pDC, cDC1, cDC2
myeloid_type <- c("CD14 Mono", "CD16 Mono", "Platelet", "Mast cells")

lymphoid_type <- c("B cells", "CD4 Proliferating", "CD4 TCM", "CD4 TEM",
                   "CD8 Naive", "CD8 Proliferating", "CD8 TCM", "CD8 TEM",
                   "dnT", "gdT", "MAIT", "NK", "ILC", "Treg")

# Day 3
day3_clstr_filt@meta.data <- day3_clstr_filt@meta.data %>%
  mutate(eb_atlas_idents = case_when(
    consolidated %in% myeloid_type ~ "Myeloid",
    consolidated %in% lymphoid_type ~ "Lymphoid",
    .default = eb_atlas_idents
  ))

# Day 15
day15_clstr_filt@meta.data <- day15_clstr_filt@meta.data %>%
  mutate(eb_atlas_idents = case_when(
    consolidated %in% myeloid_type ~ "Myeloid",
    consolidated %in% lymphoid_type ~ "Lymphoid",
    .default = eb_atlas_idents
  ))

Save

# These are necessary for the next script
saveRDS(day3_clstr_filt, file = file.path(script_output_dir, "processed_data/9_final_annot_d3.rds"))
saveRDS(day15_clstr_filt, file = file.path(script_output_dir, "processed_data/9_final_annot_d15.rds"))

Session Info

sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/Los_Angeles
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] cowplot_1.1.1               Azimuth_0.5.0              
##  [3] shinyBS_0.61.1              knitr_1.45                 
##  [5] celldex_1.10.1              SingleR_2.2.0              
##  [7] SummarizedExperiment_1.30.2 Biobase_2.60.0             
##  [9] GenomicRanges_1.52.1        GenomeInfoDb_1.36.4        
## [11] IRanges_2.34.1              S4Vectors_0.38.2           
## [13] BiocGenerics_0.46.0         MatrixGenerics_1.12.3      
## [15] matrixStats_1.0.0           lubridate_1.9.3            
## [17] forcats_1.0.0               stringr_1.5.0              
## [19] dplyr_1.1.3                 purrr_1.0.2                
## [21] readr_2.1.5                 tidyr_1.3.1                
## [23] tibble_3.2.1                ggplot2_3.4.4              
## [25] tidyverse_2.0.0             Seurat_5.0.1               
## [27] SeuratObject_5.0.1          sp_2.1-1                   
## 
## loaded via a namespace (and not attached):
##   [1] R.methodsS3_1.8.2                 progress_1.2.3                   
##   [3] poweRlaw_0.70.6                   goftest_1.2-3                    
##   [5] DT_0.30                           Biostrings_2.68.1                
##   [7] vctrs_0.6.4                       spatstat.random_3.2-1            
##   [9] pbmcref.SeuratData_1.0.0          digest_0.6.33                    
##  [11] png_0.1-8                         ggrepel_0.9.4                    
##  [13] deldir_1.0-9                      parallelly_1.36.0                
##  [15] lungref.SeuratData_2.0.0          MASS_7.3-60.0.1                  
##  [17] Signac_1.12.0                     MAST_1.26.0                      
##  [19] reshape2_1.4.4                    httpuv_1.6.12                    
##  [21] withr_3.0.0                       xfun_0.40                        
##  [23] ellipsis_0.3.2                    survival_3.5-8                   
##  [25] EnsDb.Hsapiens.v86_2.99.0         memoise_2.0.1                    
##  [27] zoo_1.8-12                        gtools_3.9.4                     
##  [29] pbapply_1.7-2                     R.oo_1.25.0                      
##  [31] prettyunits_1.2.0                 KEGGREST_1.40.1                  
##  [33] promises_1.2.1                    cbmc.SeuratData_3.1.4            
##  [35] httr_1.4.7                        restfulr_0.0.15                  
##  [37] globals_0.16.2                    fitdistrplus_1.1-11              
##  [39] rhdf5filters_1.12.1               rhdf5_2.44.0                     
##  [41] rstudioapi_0.15.0                 miniUI_0.1.1.1                   
##  [43] generics_0.1.3                    pbmc3k.SeuratData_3.1.4          
##  [45] curl_5.0.2                        zlibbioc_1.46.0                  
##  [47] ScaledMatrix_1.8.1                polyclip_1.10-6                  
##  [49] GenomeInfoDbData_1.2.10           ExperimentHub_2.8.1              
##  [51] interactiveDisplayBase_1.38.0     xtable_1.8-4                     
##  [53] pracma_2.4.2                      evaluate_0.22                    
##  [55] S4Arrays_1.2.0                    BiocFileCache_2.8.0              
##  [57] hms_1.1.3                         irlba_2.3.5.1                    
##  [59] colorspace_2.1-0                  filelock_1.0.2                   
##  [61] hdf5r_1.3.8                       ROCR_1.0-11                      
##  [63] reticulate_1.34.0                 spatstat.data_3.0-3              
##  [65] magrittr_2.0.3                    lmtest_0.9-40                    
##  [67] glmGamPoi_1.12.2                  later_1.3.1                      
##  [69] viridis_0.6.4                     lattice_0.22-5                   
##  [71] spatstat.geom_3.2-7               future.apply_1.11.0              
##  [73] scattermore_1.2                   XML_3.99-0.14                    
##  [75] RcppAnnoy_0.0.21                  pillar_1.9.0                     
##  [77] nlme_3.1-163                      caTools_1.18.2                   
##  [79] compiler_4.3.3                    beachmat_2.16.0                  
##  [81] RSpectra_0.16-1                   stringi_1.8.3                    
##  [83] tensor_1.5                        GenomicAlignments_1.36.0         
##  [85] plyr_1.8.9                        crayon_1.5.2                     
##  [87] abind_1.4-5                       BiocIO_1.10.0                    
##  [89] googledrive_2.1.1                 bit_4.0.5                        
##  [91] fastmatch_1.1-4                   codetools_0.2-19                 
##  [93] BiocSingular_1.16.0               bslib_0.5.1                      
##  [95] SeuratData_0.2.2.9001             plotly_4.10.3                    
##  [97] mime_0.12                         splines_4.3.3                    
##  [99] Rcpp_1.0.11                       fastDummies_1.7.3                
## [101] dbplyr_2.3.4                      sparseMatrixStats_1.12.2         
## [103] cellranger_1.1.0                  blob_1.2.4                       
## [105] utf8_1.2.4                        here_1.0.1                       
## [107] BiocVersion_3.17.1                seqLogo_1.68.0                   
## [109] AnnotationFilter_1.26.0           fs_1.6.3                         
## [111] listenv_0.9.0                     DelayedMatrixStats_1.22.6        
## [113] panc8.SeuratData_3.0.2            Matrix_1.6-4                     
## [115] tzdb_0.4.0                        pkgconfig_2.0.3                  
## [117] pheatmap_1.0.12                   tools_4.3.3                      
## [119] cachem_1.0.8                      RSQLite_2.3.1                    
## [121] viridisLite_0.4.2                 DBI_1.2.1                        
## [123] fastmap_1.1.1                     rmarkdown_2.25                   
## [125] scales_1.3.0                      grid_4.3.3                       
## [127] ica_1.0-3                         ifnb.SeuratData_3.0.0            
## [129] shinydashboard_0.7.2              Rsamtools_2.16.0                 
## [131] AnnotationHub_3.8.0               sass_0.4.7                       
## [133] patchwork_1.1.3                   BiocManager_1.30.22              
## [135] dotCall64_1.1-0                   RANN_2.6.1                       
## [137] farver_2.1.1                      yaml_2.3.8                       
## [139] rtracklayer_1.60.1                cli_3.6.2                        
## [141] leiden_0.4.3                      lifecycle_1.0.4                  
## [143] uwot_0.1.16                       presto_1.0.0                     
## [145] BSgenome.Hsapiens.UCSC.hg38_1.4.5 BiocParallel_1.34.2              
## [147] annotate_1.78.0                   timechange_0.3.0                 
## [149] gtable_0.3.4                      rjson_0.2.21                     
## [151] ggridges_0.5.4                    progressr_0.14.0                 
## [153] parallel_4.3.3                    jsonlite_1.8.7                   
## [155] RcppHNSW_0.5.0                    TFBSTools_1.40.0                 
## [157] bitops_1.0-7                      bit64_4.0.5                      
## [159] Rtsne_0.16                        spatstat.utils_3.0-4             
## [161] CNEr_1.38.0                       jquerylib_0.1.4                  
## [163] highr_0.10                        shinyjs_2.1.0                    
## [165] SeuratDisk_0.0.0.9020             R.utils_2.12.2                   
## [167] lazyeval_0.2.2                    shiny_1.7.5.1                    
## [169] htmltools_0.5.7                   GO.db_3.17.0                     
## [171] sctransform_0.4.1                 rappdirs_0.3.3                   
## [173] ensembldb_2.26.0                  glue_1.7.0                       
## [175] TFMPvalue_0.0.9                   spam_2.10-0                      
## [177] googlesheets4_1.1.1               XVector_0.40.0                   
## [179] RCurl_1.98-1.12                   rprojroot_2.0.3                  
## [181] BSgenome_1.70.1                   gridExtra_2.3                    
## [183] JASPAR2020_0.99.10                igraph_1.5.1                     
## [185] R6_2.5.1                          SingleCellExperiment_1.22.0      
## [187] RcppRoll_0.3.0                    labeling_0.4.3                   
## [189] GenomicFeatures_1.54.1            cluster_2.1.6                    
## [191] Rhdf5lib_1.22.1                   gargle_1.5.2                     
## [193] DirichletMultinomial_1.44.0       DelayedArray_0.26.7              
## [195] tidyselect_1.2.0                  ProtGenerics_1.34.0              
## [197] xml2_1.3.5                        AnnotationDbi_1.62.2             
## [199] future_1.33.0                     rsvd_1.0.5                       
## [201] munsell_0.5.0                     KernSmooth_2.23-22               
## [203] data.table_1.15.0                 htmlwidgets_1.6.2                
## [205] RColorBrewer_1.1-3                biomaRt_2.58.0                   
## [207] rlang_1.1.1                       spatstat.sparse_3.0-3            
## [209] spatstat.explore_3.2-5            fansi_1.0.6